In [25]:
import pandas as pd
import openpyxl
import xlrd
import os
%run ./

In [59]:
data_path = "./data/"

In [56]:
list_dir = os.listdir(data_path+"raw")


In [34]:
data = pd.DataFrame()
fname_error = dict()
for fname in list_dir:
    temp = pd.DataFrame()
            temp = get_data(fname,data_path)
            data = data.append(temp)
        except Exception as e:
            fname_error[fname] = e
            temp = get_data_xls(fname,data_path)
            data = data.append(temp)
        except Exception as e:
            fname_error[fname] = e

In [35]:
questions = pd.unique(data['question'])

array(['10. Ability to Pay for Medical Expenses',
       '40. Ability to Recover from Unexpected Expenses',
       '4. Absence of Constraints in different aspects of life - Confident',
       '61. Agreement on Statements on Corruption in Local Offices - A LOT',
       '61. Agreement on Statements on Corruption in Local Offices - ALL',
       '61. Agreement on Statements on Corruption in Local Offices - FEW',
       '61. Agreement on Statements on Corruption in Local Offices - NONE',
       '62. Agreement on Statements on Corruption in National Offices - A LOT',
       '62. Agreement on Statements on Corruption in National Offices - ALL',
       '62. Agreement on Statements on Corruption in National Offices - FEW',
       '62. Agreement on Statements on Corruption in National Offices - NONE',
       '51. Agreement on Statements on Employment - Agree',
       '15. Agreement on Statements on Employment - Agree',
       '51. Agreement on Statements on Employment - Disagree',
       '15. Agreement on Statements on Employment - Disagree',
       '60. Agreement on Statements on Governance - Agree',
       '60. Agreement on Statements on Governance - Disagree',
       '57. Agreement on Statements on Justice - Agree',
       '57. Agreement on Statements on Justice - Disagree',
       '6. Confidence Achieving Personal Aspirations',
       '9. Confidence Family Good Health',
       '7. Confidence in Achieving Desired Life Status',
       '26. Confidence on Ability to Buy a House',
       '48. Confidence on Ability to Send Children to School - Confident',
       '48. Confidence on Ability to Send Children to School - Not Confident',
       '50. Constraints to Education: BIG HINDRANCE',
       '50. Constraints to Education: NOT HINDRANCE',
       '50. Constraints to Education: SMALL HINDRANCE',
       '43. Current Constraints to Saving Money: BIG HINDRANCE',
       '43. Current Constraints to Saving Money: NOT HINDRANCE',
       '43. Current Constraints to Saving Money: SMALL HINDRANCE',
       '38. Current Life Status', '42. Current Savings',
       '1. Desired Life Status', '16. Desired Occupation',
       'S5. Educational Attainment',
       '58. Fairness of Treatment from Gov Agencies - Fair',
       '58. Fairness of Treatment from Gov Agencies - Unfair',
       '63. Fear of Insurgencies',
       '2. Ideas of Life Status - DWELLING/ HOUSE',
       '2. Ideas of Life Status - EDUCATION OF CHILDREN',
       '2. Ideas of Life Status - FINANCES',
       '2. Ideas of Life Status - OCCUPATION',
       '2. Ideas of Life Status - VACATION',
       '2. Ideas of Life Status - VEHICLE/TRANSPORTATION',
       '13. Importance of Education for Children',
       '35. Importance of Peace and Sec for National Development',
       '30. Important Economic Attainment: Rank 1',
       '33. Important Gov Services for Better Future: Rank 1',
       '41. Incidence of Saving in the Past Year',
       'S4. Level of education currently in',
       '5. Outlook for Children: They will finish their studies.',
       '39. Perceived Income Level ---- SCALE',
       '17. Preferred Type of Occupation ---- SCALE',
       '52. Presence of Opportunities for Employment or Business',
       '47. Satisfaction on Health Offices in the Community: NOT SATISFIED',
       '47. Satisfaction on Health Offices in the Community: SATISFIED',
       '49. Satisfaction on Schools in Community: DISSATISFIED',
       '49. Satisfaction on Schools in Community: SATISFIED',
       '18. Support Upon Retirement',
       '8. Whether can have Savings for the family',
       '46. Whether Have Access to Hospital',
       '54. Whether Have Job Security',
       '55. Whether Have Plan to Have a Business',
       '44. Whether Owns House Currently Live in',
       '45. Whether Worried About Hospital Bills',
       '53. Whether Worried on Having Job or Source of Income'], dtype=object)

In [36]:


In [37]:


We are not able to read the following files:

In [38]:

{'.DS_Store': xlrd.biffh.XLRDError("Unsupported format, or corrupt file: Expected BOF record; found b'\\x00\\x00\\x00\\x01Bud1'"),
 'Confidence-that-Filipinos-Will-Have-High-Standard-of-Living.xlsx': ValueError('Plan shapes are not aligned'),
 'Importance-of-Peace-and-Sec-for-Personal-Prosperity.xlsx': ValueError('Plan shapes are not aligned'),
 'Preferred-Community-to-Live-in.xlsx': ValueError('Plan shapes are not aligned'),
 'Preferred-Dwelling-type.xlsx': ValueError('Plan shapes are not aligned'),
 'Preferred-Work-Location.xlsx': ValueError('Plan shapes are not aligned'),
 'Source-of-Financing-in-Buying-a-House.xlsx': ValueError('Plan shapes are not aligned'),
 'current-level-of-educationS5-educational-attainment.xlsx': zipfile.BadZipFile('File is not a zip file')}

In [41]:
data.columns = ['Age 15-19', 'Age 20-29', 'Age 30-39', 'Age 40-50', 'SEC AB', \
                'SEC ABC (NET)', 'SEC C1', 'SEC C2', 'SEC D', 'SEC E', \
                'Gender Female', 'Gender Male', 'Work Status Not Working', 'Response', 'Locale Rural', 'Work Status Student',
       'TOTAL', 'Total', 'Local Urban', 'Work Status Working', 'question']

In [60]:
data[['question','Response','Total','Age 15-19', 'Age 20-29', 'Age 30-39', \
      'Age 40-50', 'SEC AB', 'SEC ABC (NET)', 'SEC C1', 'SEC C2', 'SEC D', \
      'SEC E', 'Gender Female', 'Gender Male', 'Work Status Not Working', \
      'Work Status Student','Work Status Working',\
      'Local Urban', 'Locale Rural']].\

Final Consolidated CSV file:

In [61]:
df2 = pd.read_csv(data_path+"consolidated/Ambisyon2040SurveyDataConsolidatedB.csv",sep="|")

In [62]:

question Response Total Age 15-19 Age 20-29 Age 30-39 Age 40-50 SEC AB SEC ABC (NET) SEC C1 SEC C2 SEC D SEC E Gender Female Gender Male Work Status Not Working Work Status Student Work Status Working Local Urban Locale Rural
0 10. Ability to Pay for Medical Expenses Unweighted 10000 1511 3057 2967 2465 60 2280 493 1727 4784 2936 4998 5002 4058 1015 4927 4620 5380
1 10. Ability to Pay for Medical Expenses Weighted 10000 1978 3211 2594 2217 60 2260 482 1717 4820 2921 4968 5032 4042 1305 4653 4569 5431
2 10. Ability to Pay for Medical Expenses NaN % % % % % % % % % % % % % % % % % %
3 10. Ability to Pay for Medical Expenses CONFIDENT 78.6 81.7 81 77.5 73.8 98.3 87.6 89.8 86.6 79.7 70 77.8 79.5 76.9 84.6 78.5 81.4 76.3
4 10. Ability to Pay for Medical Expenses Definitely confident (4.00) 28.4 30.9 29.2 26.5 27.3 63.2 36.7 39.3 35 28.3 22.2 28 28.8 26.4 33.6 28.7 28.2 28.6
5 10. Ability to Pay for Medical Expenses Somewhat confident (3.00) 50.2 50.8 51.8 50.9 46.5 35.1 50.9 50.5 51.6 51.4 47.7 49.8 50.7 50.5 51 49.7 53.2 47.7
6 10. Ability to Pay for Medical Expenses NOT CONFIDENT 20.6 17.3 18.5 21.6 25.3 1.7 11.7 10 12.5 19.6 29 21.5 19.6 22.2 15.1 20.6 17.9 22.8
7 10. Ability to Pay for Medical Expenses Somewhat not confident (2.00) 15.6 14.5 13.9 16.4 18.3 1.7 9.1 8.6 9.5 15.4 21.1 16.4 14.9 16.9 12.8 15.3 14.3 16.8
8 10. Ability to Pay for Medical Expenses Definitely not confident (1.00) 4.9 2.8 4.6 5.2 6.9 - 2.6 1.4 3 4.2 7.9 5.1 4.8 5.3 2.3 5.3 3.6 6
9 10. Ability to Pay for Medical Expenses REFUSED - * - - - - * - * - - - - - - - - -
10 10. Ability to Pay for Medical Expenses DON’T KNOW 0.8 1 0.5 0.9 0.9 - 0.7 * 0.8 0.7 1 0.7 0.9 0.9 * 0.9 0.7 0.8
11 40. Ability to Recover from Unexpected Expenses NaN NaN AGE NaN NaN NaN NaN SEC NaN NaN NaN NaN NaN GENDER NaN NaN WORK STATUS LOCALE NaN
12 40. Ability to Recover from Unexpected Expenses NaN Total 15-19 20-29 30-39 40-50 AB ABC (NET) C1 C2 D E Female Male Not Working Student Working Urban Rural
13 40. Ability to Recover from Unexpected Expenses BASE- Total inteviews: NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
14 40. Ability to Recover from Unexpected Expenses Unweighted 10000.0 1511.0 3057.0 2967.0 2465.0 60.0 2280.0 493.0 1727.0 4784.0 2936.0 4998.0 5002.0 4058.0 1015.0 4927.0 4620.0 5380.0
15 40. Ability to Recover from Unexpected Expenses Weighted 10000.0 1978.0 3211.0 2594.0 2217.0 60.0 2260.0 482.0 1717.0 4820.0 2921.0 4968.0 5032.0 4042.0 1305.0 4653.0 4569.0 5431.0
16 40. Ability to Recover from Unexpected Expenses NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
17 40. Ability to Recover from Unexpected Expenses NaN % % % % % % % % % % % % % % % % % %
18 40. Ability to Recover from Unexpected Expenses NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
19 40. Ability to Recover from Unexpected Expenses Has the capacity to recover from small expense... 65.9 64.3 65.3 67.0 67.0 35.5 62.3 54.4 65.4 69.7 62.5 66.1 65.8 66.3 64.0 66.2 65.5 66.3
20 40. Ability to Recover from Unexpected Expenses NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
21 40. Ability to Recover from Unexpected Expenses Has the capacity to recover from big expenses 22.3 23.5 23.6 21.6 20.1 62.9 33.6 42.3 30.1 21.2 15.3 22.5 22.1 21.5 25.0 22.3 24.9 20.1
22 40. Ability to Recover from Unexpected Expenses NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
23 40. Ability to Recover from Unexpected Expenses Has no capacity to recover even from small exp... 11.3 11.5 10.6 11.1 12.6 1.6 3.6 2.5 4.0 8.6 21.8 11.1 11.6 11.8 10.3 11.2 9.0 13.4
24 40. Ability to Recover from Unexpected Expenses NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
25 40. Ability to Recover from Unexpected Expenses Refused (vol.) * * * * - - * - * * * * * * * * * *
26 40. Ability to Recover from Unexpected Expenses NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
27 40. Ability to Recover from Unexpected Expenses Don't Know (vol.) * 0.6 * * * - 0.5 0.8 * * * * * * 0.6 * 0.6 *
28 4. Absence of Constraints in different aspects... Unweighted 10000 1511 3057 2967 2465 60 2280 493 1727 4784 2936 4998 5002 4058 1015 4927 4620 5380
29 4. Absence of Constraints in different aspects... Weighted 10000 1978 3211 2594 2217 60 2260 482 1717 4820 2921 4968 5032 4042 1305 4653 4569 5431
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1035 45. Whether Worried About Hospital Bills WORRY 15.0 15.6 15.3 14.7 14.3 39.5 22.6 31.5 19.6 13.9 10.8 14.7 15.2 14.4 15.0 15.4 18.5 12.0
1036 45. Whether Worried About Hospital Bills Definitely will worry (1.00) 58.5 56.9 57.9 58.8 60.5 34.6 48.7 39.0 52.0 59.7 64.2 59.8 57.2 59.5 57.2 58.0 52.6 63.5
1037 45. Whether Worried About Hospital Bills Will worry (2.00) 26.4 27.4 26.5 26.4 25.2 25.9 28.5 29.3 28.4 26.1 25.0 25.2 27.5 25.8 27.8 26.4 28.7 24.4
1038 45. Whether Worried About Hospital Bills NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1039 45. Whether Worried About Hospital Bills WON'T WORRY 84.9 84.4 84.4 85.1 85.7 60.5 77.3 68.3 80.4 85.8 89.2 85.1 84.7 85.3 85.0 84.4 81.3 87.9
1040 45. Whether Worried About Hospital Bills Will worry a little (3.00) 13.4 13.8 13.9 13.3 12.6 29.2 19.7 27.5 17.2 12.6 9.8 13.2 13.6 13.1 13.3 13.7 16.6 10.8
1041 45. Whether Worried About Hospital Bills Will not worry (4.00) 1.6 1.8 1.4 1.4 1.7 10.3 2.9 4.0 2.4 1.3 0.9 1.5 1.6 1.3 1.7 1.7 1.9 1.2
1042 45. Whether Worried About Hospital Bills NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1043 45. Whether Worried About Hospital Bills REFUSED * - * * - - * * - * - * * * - * * *
1044 45. Whether Worried About Hospital Bills NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1045 45. Whether Worried About Hospital Bills DON’T KNOW - - - * - - - - - - - - - - - - - -
1046 53. Whether Worried on Having Job or Source of... NaN NaN AGE NaN NaN NaN NaN SEC NaN NaN NaN NaN NaN GENDER NaN NaN WORK STATUS LOCALE NaN
1047 53. Whether Worried on Having Job or Source of... NaN Total 15-19 20-29 30-39 40-50 AB ABC (NET) C1 C2 D E Female Male Not Working Student Working Urban Rural
1048 53. Whether Worried on Having Job or Source of... BASE- Total interviews: NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1049 53. Whether Worried on Having Job or Source of... Unweighted 10000.0 1511.0 3057.0 2967.0 2465.0 60.0 2280.0 493.0 1727.0 4784.0 2936.0 4998.0 5002.0 4058.0 1015.0 4927.0 4620.0 5380.0
1050 53. Whether Worried on Having Job or Source of... Total 10000.0 1978.0 3211.0 2594.0 2217.0 60.0 2260.0 482.0 1717.0 4820.0 2921.0 4968.0 5032.0 4042.0 1305.0 4653.0 4569.0 5431.0
1051 53. Whether Worried on Having Job or Source of... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1052 53. Whether Worried on Having Job or Source of... NaN % % % % % % % % % % % % % % % % % %
1053 53. Whether Worried on Having Job or Source of... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1054 53. Whether Worried on Having Job or Source of... I worry about my job or source of income alway... 36.6 31.1 35.8 37.4 41.6 21.7 29.2 26.6 30.2 34.7 45.4 37.1 36.1 38.8 28.9 36.8 30.8 41.4
1055 53. Whether Worried on Having Job or Source of... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1056 53. Whether Worried on Having Job or Source of... I worry about my job or source of income often 30.0 29.6 30.3 31.0 28.8 18.9 24.1 18.6 25.8 33.1 29.6 29.7 30.3 31.1 26.5 30.1 30.5 29.6
1057 53. Whether Worried on Having Job or Source of... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1058 53. Whether Worried on Having Job or Source of... I seldom worry about my job or source of income 25.1 26.6 26.2 24.2 23.1 25.9 33.3 39.7 31.8 24.0 20.4 25.0 25.2 23.2 29.6 25.4 27.5 23.0
1059 53. Whether Worried on Having Job or Source of... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1060 53. Whether Worried on Having Job or Source of... I do not worry about my job or source of income 7.1 9.4 6.8 6.7 6.0 31.7 12.2 14.5 10.9 6.8 3.7 7.0 7.2 6.0 10.7 7.1 9.6 5.0
1061 53. Whether Worried on Having Job or Source of... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1062 53. Whether Worried on Having Job or Source of... Refused (vol.) 0.8 1.5 0.7 0.7 * 1.7 0.6 * 0.6 1.0 * 0.8 0.8 0.6 1.8 0.6 0.9 0.6
1063 53. Whether Worried on Having Job or Source of... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1064 53. Whether Worried on Having Job or Source of... Don't Know (vol.) * 1.9 * * * - 0.6 * 0.7 * * * * * 2.4 - 0.6 *

1065 rows × 20 columns

In [ ]: